0edb4d
@@ -32,6 +32,7 @@
 import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
@@ -64,11 +65,11 @@
 public class ReduceSinkDeDuplication implements Transform{
 
   protected ParseContext pGraphContext;
-  
+
   @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
     pGraphContext = pctx;
-    
+
  // generate pruned column list for all relevant operators
     ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
 
@@ -88,7 +89,7 @@
public ParseContext transform(ParseContext pctx) throws SemanticException {
     ogw.startWalking(topNodes, null);
     return pGraphContext;
   }
-  
+
   class ReduceSinkDeduplicateProcCtx implements NodeProcessorCtx{
     ParseContext pctx;
     List<ReduceSinkOperator> rejectedRSList;
@@ -97,11 +98,11 @@
public ReduceSinkDeduplicateProcCtx(ParseContext pctx) {
       rejectedRSList = new ArrayList<ReduceSinkOperator>();
       this.pctx = pctx;
     }
-    
+
     public boolean contains (ReduceSinkOperator rsOp) {
       return rejectedRSList.contains(rsOp);
     }
-    
+
     public void addRejectedReduceSinkOperator(ReduceSinkOperator rsOp) {
       if (!rejectedRSList.contains(rsOp)) {
         rejectedRSList.add(rsOp);
@@ -116,10 +117,10 @@
public void setPctx(ParseContext pctx) {
       this.pctx = pctx;
     }
   }
-  
-  
+
+
   static class ReduceSinkDeduplicateProcFactory {
-    
+
 
     public static NodeProcessor getReducerReducerProc() {
       return new ReducerReducerProc();
@@ -140,7 +141,7 @@
public Object process(Node nd, Stack<Node> stack,
         return null;
       }
     }
-    
+
     static class ReducerReducerProc implements NodeProcessor {
       @Override
       public Object process(Node nd, Stack<Node> stack,
@@ -148,11 +149,17 @@
public Object process(Node nd, Stack<Node> stack,
           throws SemanticException {
         ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx;
         ReduceSinkOperator childReduceSink = (ReduceSinkOperator)nd;
-        
+
         if(ctx.contains(childReduceSink)) {
           return null;
         }
-        
+
+        List<Operator<? extends Serializable>> childOp = childReduceSink.getChildOperators();
+        if (childOp != null && childOp.size() == 1 && childOp.get(0) instanceof GroupByOperator) {
+          ctx.addRejectedReduceSinkOperator(childReduceSink);
+          return null;
+        }
+
         ParseContext pGraphContext = ctx.getPctx();
         HashMap<String, String> childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink);
         ReduceSinkOperator parentRS = null;
@@ -171,7 +178,7 @@
public Object process(Node nd, Stack<Node> stack,
         } else {
           stopBacktrackFlagOp = parentRS.getParentOperators().get(0);
         }
-        
+
         boolean succeed = backTrackColumnNames(childColumnMapping, childReduceSink, stopBacktrackFlagOp, pGraphContext);
         if (!succeed) {
           return null;
@@ -180,7 +187,7 @@
public Object process(Node nd, Stack<Node> stack,
         if (!succeed) {
           return null;
         }
-        
+
         boolean same = compareReduceSink(childReduceSink, parentRS, childColumnMapping, parentColumnMapping);
         if (!same) {
           return null;
@@ -193,18 +200,18 @@
private void replaceReduceSinkWithSelectOperator(
           ReduceSinkOperator childReduceSink, ParseContext pGraphContext) throws SemanticException {
         List<Operator<? extends Serializable>> parentOp = childReduceSink.getParentOperators();
         List<Operator<? extends Serializable>> childOp = childReduceSink.getChildOperators();
-        
+
         Operator<? extends Serializable> oldParent = childReduceSink;
-        
+
         if (childOp != null && childOp.size() == 1
             && ((childOp.get(0)) instanceof ExtractOperator)) {
           oldParent = childOp.get(0);
           childOp = childOp.get(0).getChildOperators();
         }
-        
+
         Operator<? extends Serializable> input = parentOp.get(0);
         input.getChildOperators().clear();
-        
+
         RowResolver inputRR = pGraphContext.getOpParseCtx().get(input).getRowResolver();
 
         ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
@@ -239,9 +246,9 @@
private void replaceReduceSinkWithSelectOperator(
         for (Operator<? extends Serializable> ch : childOp) {
           ch.replaceParent(oldParent, sel);
         }
-        
+
       }
-      
+
       private Operator<? extends Serializable> putOpInsertMap(
           Operator<? extends Serializable> op, RowResolver rr, ParseContext pGraphContext) {
         OpParseContext ctx = new OpParseContext(rr);
@@ -253,16 +260,16 @@
private boolean compareReduceSink(ReduceSinkOperator childReduceSink,
           ReduceSinkOperator parentRS,
           HashMap<String, String> childColumnMapping,
           HashMap<String, String> parentColumnMapping) {
-        
+
         ArrayList<ExprNodeDesc> childPartitionCols = childReduceSink.getConf().getPartitionCols();
         ArrayList<ExprNodeDesc> parentPartitionCols = parentRS.getConf().getPartitionCols();
-        
+
         boolean ret = compareExprNodes(childColumnMapping, parentColumnMapping,
             childPartitionCols, parentPartitionCols);
         if (!ret) {
           return false;
         }
-        
+
         ArrayList<ExprNodeDesc> childReduceKeyCols = childReduceSink.getConf().getKeyCols();
         ArrayList<ExprNodeDesc> parentReduceKeyCols = parentRS.getConf().getKeyCols();
         ret = compareExprNodes(childColumnMapping, parentColumnMapping,
@@ -270,7 +277,7 @@
private boolean compareReduceSink(ReduceSinkOperator childReduceSink,
         if (!ret) {
           return false;
         }
-        
+
         String childRSOrder = childReduceSink.getConf().getOrder();
         String parentRSOrder = parentRS.getConf().getOrder();
         boolean moveChildRSOrderToParent = false;
@@ -285,14 +292,14 @@
private boolean compareReduceSink(ReduceSinkOperator childReduceSink,
             moveChildRSOrderToParent = true;
           }
         }
-        
+
         int childNumReducers = childReduceSink.getConf().getNumReducers();
         int parentNumReducers = parentRS.getConf().getNumReducers();
         boolean moveChildReducerNumToParent = false;
         //move child reduce sink's number reducers to the parent reduce sink operator.
         if (childNumReducers != parentNumReducers) {
           if (childNumReducers == -1) {
-            //do nothing. 
+            //do nothing.
           } else if (parentNumReducers == -1) {
             //set childNumReducers in the parent reduce sink operator.
             moveChildReducerNumToParent = true;
@@ -300,15 +307,15 @@
private boolean compareReduceSink(ReduceSinkOperator childReduceSink,
             return false;
           }
         }
-        
+
         if(moveChildRSOrderToParent) {
-          parentRS.getConf().setOrder(childRSOrder);          
+          parentRS.getConf().setOrder(childRSOrder);
         }
-        
+
         if(moveChildReducerNumToParent) {
           parentRS.getConf().setNumReducers(childNumReducers);
         }
-        
+
         return true;
       }
 
@@ -316,14 +323,14 @@
private boolean compareExprNodes(HashMap<String, String> childColumnMapping,
           HashMap<String, String> parentColumnMapping,
           ArrayList<ExprNodeDesc> childColExprs,
           ArrayList<ExprNodeDesc> parentColExprs) {
-        
+
         boolean childEmpty = childColExprs == null || childColExprs.size() == 0;
         boolean parentEmpty = parentColExprs == null || parentColExprs.size() == 0;
-        
+
         if (childEmpty) { //both empty
           return true;
         }
-        
+
         //child not empty here
         if (parentEmpty) { // child not empty, but parent empty
           return false;
@@ -383,13 +390,13 @@
private boolean backTrackColumnNames(
             }
           }
         }
-        
+
         return true;
       }
 
       private HashMap<String, String> getPartitionAndKeyColumnMapping(ReduceSinkOperator reduceSink) {
         HashMap<String, String> columnMapping = new HashMap<String, String> ();
-        ReduceSinkDesc reduceSinkDesc = reduceSink.getConf();        
+        ReduceSinkDesc reduceSinkDesc = reduceSink.getConf();
         ArrayList<ExprNodeDesc> partitionCols = reduceSinkDesc.getPartitionCols();
         ArrayList<ExprNodeDesc> reduceKeyCols = reduceSinkDesc.getKeyCols();
         if(partitionCols != null) {
@@ -419,7 +426,7 @@
private ReduceSinkOperator findSingleParentReduceSink(ReduceSinkOperator childRe
             // this potentially is a join operator
             return null;
           }
-          
+
           boolean allowed = false;
           if ((start instanceof SelectOperator)
               || (start instanceof FilterOperator)
@@ -429,17 +436,17 @@
private ReduceSinkOperator findSingleParentReduceSink(ReduceSinkOperator childRe
               || (start instanceof ReduceSinkOperator)) {
             allowed = true;
           }
-          
+
           if (!allowed) {
             return null;
           }
-          
+
           if ((start instanceof ScriptOperator)
               && !HiveConf.getBoolVar(pGraphContext.getConf(),
                   HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST)) {
             return null;
           }
-          
+
           start = start.getParentOperators().get(0);
           if(start instanceof ReduceSinkOperator) {
             return (ReduceSinkOperator)start;
@@ -448,6 +455,6 @@
private ReduceSinkOperator findSingleParentReduceSink(ReduceSinkOperator childRe
         return null;
       }
     }
-    
+
   }
 }
